package com.ada.spark.datasource

import org.apache.spark.SparkConf
import org.apache.spark.sql.{SaveMode, SparkSession}

object ParquetTest {

    def main(args: Array[String]): Unit = {
        //创建配置对象
        val conf: SparkConf = new SparkConf().setAppName("ParquetTest").setMaster("local[*]")

        //创建SparkSql的环境对象
        val spark: SparkSession = SparkSession.builder().config(conf).getOrCreate();

        //Spark SQL的默认数据源为Parquet格式
        val peopleDF = spark.read.json("format/people.json")

        peopleDF.write.mode(SaveMode.Overwrite).parquet("hdfs://hadoop121:9000/people.parquet")

        val parquetFileDF = spark.read.parquet("hdfs://hadoop121:9000/people.parquet")

        parquetFileDF.createOrReplaceTempView("parquetFile")

        val namesDF = spark.sql("SELECT name FROM parquetFile WHERE age BETWEEN 13 AND 19")

        namesDF.show()
    }
}
